Aggregated and atomic
scores per method
# datasets = read_yaml("datasets.yml")
# print(score_file)
# datasets = read_yaml("datasets.yml")
# datasets = read_yaml(file_dataset)
list_wd = strsplit(getwd(),'/')[[1]]
# Snakemake script : the current working dir is hadaca3_framework
if(list_wd[length(list_wd)] == 'hadaca3_framework'){
score_files = list(list.files(path = "./output/scores/", full.names = TRUE))
# nextflow script :
}else{
score_files = list(list.files(pattern = 'score-li*' ))
}
results_li <- data.frame(
dataset = character(),
ref = character(),
preprocessing_mixRNA = character(),
feature_selection_mixRNA = character(),
preprocessing_RNA = character(),
feature_selection_RNA = character(),
preprocessing_scRNA = character(),
feature_selection_scRNA = character(),
deconvolution_rna = character(),
preprocessing_mixMET = character(),
feature_selection_mixMET = character(),
preprocessing_MET = character(),
feature_selection_MET = character(),
deconvolution_met = character(),
late_integration = character(),
aid = numeric(),
aid_norm = numeric(),
aitchison = numeric(),
aitchison_norm = numeric(),
jsd = numeric(),
jsd_norm = numeric(),
mae = numeric(),
mae_norm = numeric(),
pearson_col = numeric(),
pearson_col_norm = numeric(),
pearson_row = numeric(),
pearson_row_norm = numeric(),
pearson_tot = numeric(),
pearson_tot_norm = numeric(),
rmse = numeric(),
rmse_norm = numeric(),
score_aggreg = numeric(),
sdid = numeric(),
sdid_norm = numeric(),
spearman_col = numeric(),
spearman_col_norm = numeric(),
spearman_row = numeric(),
spearman_row_norm = numeric(),
spearman_tot = numeric(),
spearman_tot_norm = numeric()
)
i = 0
for (score_file in score_files[[1]]) {
# Extract the base name of the file
base_name <- basename(score_file)
# Extract components from the file name
components <- str_match(base_name,
# dt ref OMIC ppmR fsmR omic ppR fsR omic ppSR fsSR deR omic ppmM fsmM omic ppM fsM deM li
# "score-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
"score-li-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
# components <- str_match(base_name, "score-(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)")[2:8]
scores <- read_hdf5(score_file)
# Append the extracted information to the results data frame
results_li <- rbind(results_li,
cbind(
data.frame(
dataset = components[1],
ref = components[2],
preprocessing_mixRNA = components[3],
feature_selection_mixRNA = components[4],
preprocessing_RNA = components[5],
feature_selection_RNA = components[6],
preprocessing_scRNA = components[7],
feature_selection_scRNA = components[8],
deconvolution_rna = components[9],
preprocessing_mixMET = components[10],
feature_selection_mixMET = components[11],
preprocessing_MET = components[12],
feature_selection_MET = components[13],
deconvolution_met = components[14],
late_integration = components[15],
stringsAsFactors = FALSE
),
scores
))
rownames(results_li) = NULL
i = i +1
}
results_li %>%
# filter(dc==2) %>%
group_by(late_integration) %>%
summarise(GlobalScore = median(score_aggreg)) %>%
arrange(desc(GlobalScore))
#> # A tibble: 3 × 2
#> late_integration GlobalScore
#> <chr> <dbl>
#> 1 OnlyMet 0.663
#> 2 limeanRMSE 0.660
#> 3 OnlyRna 0.646
results_li_arrange = results_li %>%
group_by(preprocessing_mixRNA, feature_selection_mixRNA,
preprocessing_RNA, feature_selection_RNA,
preprocessing_scRNA, feature_selection_scRNA, deconvolution_rna,
preprocessing_mixMET,feature_selection_mixMET,
preprocessing_MET, feature_selection_MET, deconvolution_met,
late_integration, .groups = "keep") %>%
summarise(GlobalScore = median(score_aggreg)) %>%
arrange(desc(GlobalScore))
#> `summarise()` has grouped output by 'preprocessing_mixRNA',
#> 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA',
#> 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna',
#> 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET',
#> 'feature_selection_MET', 'deconvolution_met', 'late_integration'. You can
#> override using the `.groups` argument.
all_data_used = c('dataset', 'ref')
for(data_used in all_data_used){
results_li[[data_used]] = factor(results_li[[data_used]],
levels = unique(results_li[[data_used]])) # levels will be alphabeticaly ordered
}
all_functions_li = c('preprocessing_mixRNA', 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA', 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna', 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET', 'feature_selection_MET', 'deconvolution_met', 'late_integration' )
for( fun in all_functions_li){
results_li[[fun]] = factor(results_li[[fun]],
levels = unique(results_li[[fun]][order(results_li$score_aggreg[results_li$dataset=='invitro1'],decreasing = T)])) # sort based on the results_li on the in vitro dataset
}
index_aggreg <- which(names(results_li) == "score_aggreg")
datatable(
results_li[, c(1:length(all_functions_li)+2, index_aggreg)],
extensions = 'Buttons',
options = list(
pageLength = 10,
dom = 'Bfrtip', # This includes the Buttons extension in the layout
buttons = list(
list(
extend = 'colvis',
text = 'Show/Hide Columns',
columns = ':not(:first-child)' # This allows all columns except the first to be toggled
)
)
)
)